'''
Created on Oct 25, 2011

@author: hp
'''

def compare_file(fname1, fname2):
    
    fin1 = open(fname1, 'r')
    fin2 = open(fname2, 'r')
    
    sent_count = 0
    subj_count = 0
    obj_count= 0
    wrong_count = 0
    number_line = 0
    
    wrong_sent = list()
    
    try:
        lines1 = fin1.read().split("\n")
        lines2 = fin2.read().split("\n")
        number_line = len(lines1)
        for line in lines1:
            if len(line) == 0: continue
            tok = line.split("\t")
            id = tok[0]
            subj=''
            obj=''
            for t in tok:
                if t.find('subj') != -1:
                    subj = t
                    break
            for t in tok:
                if t.find('dobj') != -1:
                    obj = t
                    break                            
            print id, subj, obj
            for line2 in lines2:
                if line2.find(id) != 0:
                    continue
                print line2
                if subj=='' and obj=='':
                    continue
                if subj=='':
                    if line2.find(obj) != -1:
                        sent_count = sent_count + 1
                        obj_count = obj_count + 1
                    else:
                        wrong_count = wrong_count + 1
                        wrong_sent.append(line)                        
                if obj=='':
                    if line2.find(subj) != -1:
                        sent_count = sent_count + 1
                        subj_count = subj_count + 1
                    else:
                        wrong_count = wrong_count + 1
                        wrong_sent.append(line)
                if subj != '' and obj != '':                
                    if line2.find(subj) != -1 and line2.find(obj) != -1:
                        sent_count = sent_count + 1
                        subj_count = subj_count + 1
                        obj_count = obj_count + 1
                        print 'Plus 1'
                    elif line2.find(subj) != -1:
                        subj_count = subj_count + 1
                    elif line2.find(obj) != -1:
                        obj_count = obj_count + 1
                    else:
                        wrong_count = wrong_count + 1
                        wrong_sent.append(line)
                lines2.remove(line2)
                break
    finally:
        fin1.close()
        fin2.close()
#    print wrong_count + sent_count + subj_count + obj_count
#    print "WRONG = %d" % wrong_count
#    print len(wrong_sent)
    return (sent_count, subj_count, obj_count, wrong_sent, number_line)

def compare_verb(verb, number_sense):
    
    total_sent_count = 0
    total_subj_count = 0
    total_obj_count = 0
    total_wrong_sent = list()
    total_examples = 0
    
    for id in range(1,number_sense+1):
        print id
        fname1 = "../../data/testSO/%s.doc.%d.complete.dp.POS.test" % (verb,id)
        fname2 = "../../data/dep_bnc/%s.doc.%d.complete.dp" % (verb,id)
        (sent_count, subj_count, obj_count, wrong_sent,number_line) = compare_file(fname1, fname2)
        total_sent_count = total_sent_count + sent_count
        total_subj_count = total_subj_count + subj_count
        total_obj_count = total_obj_count + obj_count
        total_wrong_sent.extend(wrong_sent)
        total_examples = total_examples + number_line

        
    print total_sent_count, total_subj_count, total_obj_count
    print len(total_wrong_sent)
    print total_examples
    print "Overlap sentence = %.2f" % ((total_sent_count) * 1.0 / total_examples)
    print "Overlap subject = %.2f" % ((total_subj_count) * 1.0 / total_examples)
    print "Overlap object = %.2f" % ((total_obj_count) * 1.0 / total_examples)

if __name__ == "__main__":
    compare_verb("acquire", 5)